# Copyright (c) 2024 Bytedance Ltd. and/or its affiliates
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .prompt_templates import *

model2name = {
        "qwen2-vl-2b": "Qwen/Qwen2-VL-2B-Instruct",
        "qwen2-vl-7b": "Qwen/Qwen2-VL-7B-Instruct",
        "qwen2-vl-72b": "Qwen/Qwen2-VL-72B-Instruct",
        "internvl-2.5-2b-mpo": "OpenGVLab/InternVL2_5-2B-MPO",
        "internvl-2.5-4b-mpo": "OpenGVLab/InternVL2_5-4B-MPO",
        "internvl-2.5-8b-mpo": "OpenGVLab/InternVL2_5-8B-MPO",
        "internvl-2.5-26b-mpo": "OpenGVLab/InternVL2_5-26B-MPO",
        "internvl-2.5-38b-mpo": "OpenGVLab/InternVL2_5-38B-MPO",
        "internvl-2.5-78b-mpo": "OpenGVLab/InternVL2_5-78B-MPO",
        "llava-onevision-0.5b": "lmms-lab/llava-onevision-qwen2-0.5b-ov",
        "llava-onevision-7b": "lmms-lab/llava-onevision-qwen2-7b-ov",
        "llava-onevision-72b": "lmms-lab/llava-onevision-qwen2-72b-ov-sft",
        "llava-video-7b": "lmms-lab/LLaVA-Video-7B-Qwen2",
        "llava-video-72b": "lmms-lab/LLaVA-Video-72B-Qwen2",
        "minicpm-v-2.6": "openbmb/MiniCPM-V-2_6",
        "videoscore": "TIGER-Lab/VideoScore",
        "videoscore-v1.1": "TIGER-Lab/VideoScore-v1.1",
}

model2module = {
        "qwen2-vl-2b": "qwen2vl",
        "qwen2-vl-7b": "qwen2vl",
        "qwen2-vl-72b": "qwen2vl",
        "internvl-2.5-2b-mpo": "internvl_2_5",
        "internvl-2.5-4b-mpo": "internvl_2_5",
        "internvl-2.5-8b-mpo": "internvl_2_5",
        "internvl-2.5-26b-mpo": "internvl_2_5",
        "internvl-2.5-38b-mpo": "internvl_2_5",
        "internvl-2.5-78b-mpo": "internvl_2_5",
        "llava-onevision-0.5b": "llava_ov",
        "llava-onevision-7b": "llava_ov",
        "llava-onevision-72b": "llava_ov",
        "llava-video-7b": "llava_video",
        "llava-video-72b": "llava_video",
        "minicpm-v-2.6": "minicpm_v",
        "gpt4o": "gpt4o",
        "videoscore": "videoscore",
        "videoscore-v1.1": "videoscore"
}

asepct2scoretokens = {
    "tv_alignment": ["yes", "no"],
    "tv_alignment_appearance": ["yes", "no"],
    "appearance_fine": ["yes", "no"],
    "tv_alignment_motion": ["yes", "no"],
    "motion_fine": ["yes", "no"],
    "static_visual_quality": ["good", "bad"],
    "aesthetic_quality": ["good", "bad"],
    "technical_quality": ["good", "bad"],
    "structural_correctness": ["good", "bad"],
    "dynamic_degree": ["yes", "no"],
    "subject_motion_degree": ["yes", "no"],
    "camera_motion_degree": ["yes", "no"],
    "light_change": ["yes", "no"],
    "temporal_visual_quality": ["good", "bad"],
    "appearance_consistency": ["good", "bad"],
    "flickering": ["good", "bad"],
    "motion_naturalness": ["good", "bad"],
}

mode2prompt = {
    "single_hard": {
        "tv_alignment": TV_ALIGNMENT_SINGLE_HARD,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_SINGLE_HARD,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_SINGLE_HARD,
        "static_visual_quality": STATIC_VISUAL_QUALITY_SINGLE_HARD,
        "aesthetic_quality": AESTHETIC_QUALITY_SINGLE_HARD,
        "technical_quality": TECHNICAL_QUALITY_SINGLE_HARD,
        "structural_correctness": STRUCTURAL_CORRECTNESS_SINGLE_HARD,
        "dynamic_degree": DYNAMIC_DEGREE_SINGLE_HARD,
        "subject_motion_degree": SUBJECT_MOTION_SINGLE_HARD,
        "camera_motion_degree": CAMERA_MOTION_SINGLE_HARD,
        "light_change": LIGHT_CHANGE_SINGLE_HARD,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_SINGLE_HARD,
        "appearance_consistency": APPEARANCE_CONSISTENCY_SINGLE_HARD,
        "flickering": FLICKERING_SINGLE_HARD,
        "motion_naturalness": MOTION_NATURALNESS_SINGLE_HARD,
    },
    "single_soft_yn": {
        "tv_alignment": TV_ALIGNMENT_SINGLE_SOFT_YN,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_SINGLE_SOFT_YN,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_SINGLE_SOFT_YN,
        "static_visual_quality": STATIC_VISUAL_QUALITY_SINGLE_SOFT_YN,
        "aesthetic_quality": AESTHETIC_QUALITY_SINGLE_SOFT_YN,
        "technical_quality": TECHNICAL_QUALITY_SINGLE_SOFT_YN,
        "structural_correctness": STRUCTURAL_CORRECTNESS_SINGLE_SOFT_YN,
        "dynamic_degree": DYNAMIC_DEGREE_SINGLE_SOFT_YN,
        "subject_motion_degree": SUBJECT_MOTION_SINGLE_SOFT_YN,
        "camera_motion_degree": CAMERA_MOTION_SINGLE_SOFT_YN,
        "light_change": LIGHT_CHANGE_SINGLE_SOFT_YN,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_SINGLE_SOFT_YN,
        "appearance_consistency": APPEARANCE_CONSISTENCY_SINGLE_SOFT_YN,
        "flickering": FLICKERING_SINGLE_SOFT_YN,
        "motion_naturalness": MOTION_NATURALNESS_SINGLE_SOFT_YN,
    },
    "single_soft_adaptive": {
        "tv_alignment": TV_ALIGNMENT_SINGLE_SOFT_YN,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_SINGLE_SOFT_YN,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_SINGLE_SOFT_YN,
        "static_visual_quality": STATIC_VISUAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "aesthetic_quality": AESTHETIC_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "technical_quality": TECHNICAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "structural_correctness": STRUCTURAL_CORRECTNESS_SINGLE_SOFT_GOOD_BAD,
        "dynamic_degree": DYNAMIC_DEGREE_SINGLE_SOFT_YN,
        "subject_motion_degree": SUBJECT_MOTION_SINGLE_SOFT_YN,
        "camera_motion_degree": CAMERA_MOTION_SINGLE_SOFT_YN,
        "light_change": LIGHT_CHANGE_SINGLE_SOFT_YN,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "appearance_consistency": APPEARANCE_CONSISTENCY_SINGLE_SOFT_GOOD_BAD,
        "flickering": FLICKERING_SINGLE_SOFT_GOOD_BAD,
        "motion_naturalness": MOTION_NATURALNESS_SINGLE_SOFT_GOOD_BAD,
    },
    "single_soft_good_bad": {
        "tv_alignment": TV_ALIGNMENT_SINGLE_SOFT_GOOD_BAD,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_SINGLE_SOFT_GOOD_BAD,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_SINGLE_SOFT_GOOD_BAD,
        "static_visual_quality": STATIC_VISUAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "aesthetic_quality": AESTHETIC_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "technical_quality": TECHNICAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "structural_correctness": STRUCTURAL_CORRECTNESS_SINGLE_SOFT_GOOD_BAD,
        "dynamic_degree": DYNAMIC_DEGREE_SINGLE_SOFT_GOOD_BAD,
        "subject_motion_degree": SUBJECT_MOTION_SINGLE_SOFT_GOOD_BAD,
        "camera_motion_degree": CAMERA_MOTION_SINGLE_SOFT_GOOD_BAD,
        "light_change": LIGHT_CHANGE_SINGLE_SOFT_GOOD_BAD,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_SINGLE_SOFT_GOOD_BAD,
        "appearance_consistency": APPEARANCE_CONSISTENCY_SINGLE_SOFT_GOOD_BAD,
        "flickering": FLICKERING_SINGLE_SOFT_GOOD_BAD,
        "motion_naturalness": MOTION_NATURALNESS_SINGLE_SOFT_GOOD_BAD,
    },
    "single_soft_yn_simple_prompt": {
        "tv_alignment": TV_ALIGNMENT_SINGLE_SOFT_SIMPLE,
        "tv_alignment_appearance": TV_ALIGNMENT_SINGLE_SOFT_SIMPLE,
        "tv_alignment_motion": TV_ALIGNMENT_SINGLE_SOFT_SIMPLE,
        "static_visual_quality": STATIC_VISUAL_QUALITY_SINGLE_SOFT_SIMPLE,
        "aesthetic_quality": AESTHETIC_QUALITY_SINGLE_SOFT_SIMPLE,
        "technical_quality": TECHNICAL_QUALITY_SINGLE_SOFT_SIMPLE,
        "structural_correctness": STRUCTURAL_CORRECTNESS_SINGLE_SOFT_SIMPLE,
        "dynamic_degree": DYNAMIC_DEGREE_SINGLE_SOFT_SIMPLE,
        "subject_motion_degree": SUBJECT_MOTION_SINGLE_SOFT_SIMPLE,
        "camera_motion_degree": CAMERA_MOTION_SINGLE_SOFT_SIMPLE,
        "light_change": LIGHT_CHANGE_SINGLE_SOFT_SIMPLE,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_SINGLE_SOFT_SIMPLE,
        "appearance_consistency": APPEARANCE_CONSISTENCY_SINGLE_SOFT_SIMPLE,
        "flickering": FLICKERING_SINGLE_SOFT_SIMPLE,
        "motion_naturalness": MOTION_NATURALNESS_SINGLE_SOFT_SIMPLE,
    },
    "pairwise": {
        "tv_alignment": TV_ALIGNMENT_PAIR,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_PAIR,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_PAIR,
        "dynamic_degree": DYNAMIC_DEGREE_PAIRWISE,
        "subject_motion_degree": SUBJECT_MOTION_PAIRWISE,
        "camera_motion_degree": CAMERA_MOTION_PAIRWISE,
        "light_change": LIGHT_CHANGE_PAIRWISE,
        "static_visual_quality": STATIC_VISUAL_QUALITY_PAIRWISE,
        "aesthetic_quality": AESTHETIC_QUALITY_PAIRWISE,
        "technical_quality": TECHNICAL_QUALITY_PAIRWISE,
        "structural_correctness": STRUCTURAL_CORRECTNESS_PAIRWISE,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_PAIRWISE,
        "appearance_consistency": APPEARANCE_CONSISTENCY_PAIRWISE,
        "flickering": FLICKERING_PAIRWISE,
        "motion_naturalness": MOTION_NATURALNESS_PAIRWISE,
    },
    "pairwise_no_vid_index": {
        "tv_alignment": TV_ALIGNMENT_PAIR,
        "tv_alignment_appearance": TV_ALIGNMENT_APPEARANCE_PAIR,
        "tv_alignment_motion": TV_ALIGNMENT_MOTION_PAIR,
        "dynamic_degree": DYNAMIC_DEGREE_PAIRWISE,
        "subject_motion_degree": SUBJECT_MOTION_PAIRWISE,
        "camera_motion_degree": CAMERA_MOTION_PAIRWISE,
        "light_change": LIGHT_CHANGE_PAIRWISE,
        "static_visual_quality": STATIC_VISUAL_QUALITY_PAIRWISE,
        "aesthetic_quality": AESTHETIC_QUALITY_PAIRWISE,
        "technical_quality": TECHNICAL_QUALITY_PAIRWISE,
        "structural_correctness": STRUCTURAL_CORRECTNESS_PAIRWISE,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_PAIRWISE,
        "appearance_consistency": APPEARANCE_CONSISTENCY_PAIRWISE,
        "flickering": FLICKERING_PAIRWISE,
        "motion_naturalness": MOTION_NATURALNESS_PAIRWISE,
    },
    "pairwise_simple_prompt": {
        "tv_alignment": TV_ALIGNMENT_PAIR_SIMPLE,
        "tv_alignment_appearance": TV_ALIGNMENT_PAIR_SIMPLE,
        "tv_alignment_motion": TV_ALIGNMENT_PAIR_SIMPLE,
        "dynamic_degree": DYNAMIC_DEGREE_PAIRWISE_SIMPLE,
        "subject_motion_degree": SUBJECT_MOTION_PAIRWISE_SIMPLE,
        "camera_motion_degree": CAMERA_MOTION_PAIRWISE_SIMPLE,
        "light_change": LIGHT_CHANGE_PAIRWISE_SIMPLE,
        "static_visual_quality": STATIC_VISUAL_QUALITY_PAIRWISE_SIMPLE,
        "aesthetic_quality": AESTHETIC_QUALITY_PAIRWISE_SIMPLE,
        "technical_quality": TECHNICAL_QUALITY_PAIRWISE_SIMPLE,
        "structural_correctness": STRUCTURAL_CORRECTNESS_PAIRWISE_SIMPLE,
        "temporal_visual_quality": TEMPORAL_VISUAL_QUALITY_PAIRWISE_SIMPLE,
        "appearance_consistency": APPEARANCE_CONSISTENCY_PAIRWISE_SIMPLE,
        "flickering": FLICKERING_PAIRWISE_SIMPLE,
        "motion_naturalness": MOTION_NATURALNESS_PAIRWISE_SIMPLE,
    }
}

videoscoredim2aspect = {
    "visual quality": ["static_visual_quality", "aesthetic_quality", "technical_quality"],
    "temporal consistency": ["appearance_consistency"],
    "dynamic degree": ["dynamic_degree"],
    "text-to-video alignment": ["tv_alignment"],
    "factual consistency": ["structural_correctness", "motion_naturalness"],
}

aspect2videoscoredim = {
    "static_visual_quality": "visual quality",
    "aesthetic_quality": "visual quality",
    "technical_quality": "visual quality",
    "motion_naturalness": "factual consistency",
    "appearance_consistency": "temporal consistency",
    "dynamic_degree": "dynamic degree",
    "tv_alignment": "text-to-video alignment",
    "structural_correctness": "factual consistency"
}
